clear 
clear matrix
set memory 1000m
set more off
cap log close

cd "/Users/..."

global do_file="‎⁨/Users/.../do_file"
global log_file="/Users/.../log_file"
global raw_data="/Users/.../raw_data⁩⁩"
global working_data="/Users/.../working_data"
global results="/Users/.../results/072020/permutation"


log using log_file/permutation_072020.log, replace


**************************************************
**************************************************
* Permutation test: shuffle the values hard brakes 
**************************************************
**************************************************


use working_data/clean_day_level_geo_weather_081519.dta, clear

sort user_id trip_start_date


gen hb1=1 if total_prev_hard_brake>0
replace hb1=0 if hb1==.

gen hb2=1 if total_prev_hard_brake>0 & total_hard_turn>0
replace hb2=0 if hb2==.


sum hb1 hb2

forvalues i=1(1)20{
	gen random_num`i'=uniform()
}

gen temp_id=_n


* shuffle within user

forvalues i=1(1)20{

	sort user_id random_num`i'
	preserve 

	keep hb1 hb2
	gen temp_id=_n
	rename hb1 hb1_within_r`i'
	rename hb2 hb2_within_r`i'
	save $results/shuffle_within_r`i'.dta, replace 

	restore
	
}


* shuffle for all observations

forvalues i=1(1)20{

	sort random_num`i'
		preserve 

		keep hb1 hb2
		gen temp_id=_n
		rename hb1 hb1_all_r`i'
		rename hb2 hb2_all_r`i'
		save $results/shuffle_all_r`i'.dta, replace 

	restore

}



sort temp_id

forvalues i=1(1)20{

	merge 1:1 temp_id using $results/shuffle_within_r`i'.dta
	drop _merge
	erase $results/shuffle_within_r`i'.dta
	
	merge 1:1 temp_id using $results/shuffle_all_r`i'.dta
	drop _merge
	erase $results/shuffle_all_r`i'.dta
}

drop random_num1-random_num20
 
save $results/permutation_all_082519.dta, replace
 
 

** now generate differences
 
use $results/permutation_all_082519.dta, clear


sort user_id trip_start_date
bysort user_id: gen day_index=_n

tsset user_id day_index




forvalues i=1(1)20{

	gen diff_hb1_within_r`i'=hb1_within_r`i'-l.hb1_within_r`i'
	gen diff_hb2_within_r`i'=hb2_within_r`i'-l.hb2_within_r`i'
	gen diff_hb1_all_r`i'=hb1_all_r`i'-l.hb1_all_r`i'
	gen diff_hb2_all_r`i'=hb2_all_r`i'-l.hb2_all_r`i'

}



************************************
* Gen fisrt-diff for risky behavior
************************************

gen diff_agg_acc=total_agg_acc-l.total_agg_acc
gen diff_phone_use=total_phone_use-l.total_phone_use

gen diff_traffic_jam=total_traffic_jam-l.total_traffic_jam
gen diff_traffic_time=total_traffic_sec-l.total_traffic_sec
gen diff_distance=distance-l.distance
gen diff_duration=duration-l.duration
gen diff_speed=speed-l.speed
gen diff_speed2=speed2-l.speed2
gen diff_drive_at_night=drive_at_night-l.drive_at_night
gen diff_night_prop=night_duration_prop-l.night_duration_prop
gen diff_night_duration=total_night_duration-l.total_night_duration
gen diff_weekend=weekend-l.weekend
gen diff_rush_hour=rush_hour-l.rush_hour

gen diff_control_score=control_score-l.control_score
gen diff_cautious_score=cautious_score-l.cautious_score
gen diff_focused_score=focused_score-l.focused_score
gen diff_drive_score=drive_score-l.drive_score
gen diff_gap_time=gap_time-l.gap_time

gen diff_highway=highway-l.highway

** gen first_difference for weather variables, 
gen diff_sunny=sunny-l.sunny
gen diff_rain=rain-l.rain
gen diff_snow=snow-l.snow
gen diff_rain_storm=rain_storm-l.rain_storm
gen diff_cloudy=cloudy-l.cloudy

gen diff_high_temper=high_temper-l.high_temper
gen diff_low_temper=low_temper-l.low_temper





***************************
*** Start doing regressions
***************************



* just use this line to start the table
ivregress gmm diff_agg_acc l.diff_hb1_within_r1 diff_traffic_jam diff_weekend diff_rush_hour ///
diff_high_temper diff_low_temper diff_sunny diff_rain diff_snow l.diff_drive_score diff_gap_time ///
(l.diff_agg_acc = l(2).total_agg_acc), vce(cluster user_id) nocons
outreg2 using $results/permutation_072020.xls, replace keep(l.diff_hb1_within_r1) 


foreach nm of varlist diff_hb1_within_r1-diff_hb2_all_r20{
	

	
	ivregress gmm diff_phone_use diff_traffic_jam diff_weekend diff_rush_hour ///
	diff_high_temper diff_low_temper diff_sunny  diff_rain diff_snow ///
	(l.diff_phone_use l.`nm' l.diff_drive_score diff_gap_time ///
	= l(2).total_phone_use l(2).drive_score l(2).gap_time l(2).weekend l(2).rush_hour ///
	l(2).total_traffic_sec l(2).high_temper l(2).low_temper l(2).sunny l(2).rain l(2).snow), vce(cluster user_id) nocons
	outreg2 using $results/permutation_072020.xls, append keep(l.`nm')
	
	
	ivregress gmm diff_distance diff_traffic_jam diff_weekend diff_rush_hour ///
	diff_high_temper diff_low_temper diff_sunny  diff_rain diff_snow ///
	(l.diff_distance l.`nm' l.diff_drive_score diff_gap_time ///
	= l(2).distance l(2).drive_score l(2).gap_time l(2).weekend l(2).rush_hour ///
	l(2).total_traffic_sec l(2).high_temper l(2).low_temper l(2).sunny l(2).rain l(2).snow), vce(cluster user_id) nocons
	outreg2 using $results/permutation_072020.xls, append keep(l.`nm')
	
	
	ivregress gmm diff_duration diff_traffic_jam diff_weekend diff_rush_hour ///
	diff_high_temper diff_low_temper diff_sunny  diff_rain diff_snow ///
	(l.diff_duration l.`nm' l.diff_drive_score diff_gap_time ///
	= l(2).duration l(2).drive_score l(2).gap_time l(2).weekend l(2).rush_hour ///
	l(2).total_traffic_sec l(2).high_temper l(2).low_temper l(2).sunny l(2).rain l(2).snow), vce(cluster user_id) nocons
	outreg2 using $results/permutation_072020.xls, append keep(l.`nm')
	
	
	ivregress gmm diff_speed diff_traffic_jam diff_weekend diff_rush_hour ///
	diff_high_temper diff_low_temper diff_sunny  diff_rain diff_snow ///
	(l.diff_speed l.`nm' l.diff_drive_score diff_gap_time ///
	= l(2).speed l(2).drive_score l(2).gap_time l(2).weekend l(2).rush_hour ///
	l(2).total_traffic_sec l(2).high_temper l(2).low_temper l(2).sunny l(2).rain l(2).snow), vce(cluster user_id) nocons
	outreg2 using $results/permutation_072020.xls, append keep(l.`nm')
	
	
	ivregress gmm diff_drive_at_night diff_traffic_jam diff_weekend diff_rush_hour ///
	diff_high_temper diff_low_temper diff_sunny  diff_rain diff_snow ///
	(l.diff_drive_at_night l.`nm' l.diff_drive_score diff_gap_time ///
	= l(2).drive_at_night l(2).drive_score l(2).gap_time l(2).weekend l(2).rush_hour ///
	l(2).total_traffic_sec l(2).high_temper l(2).low_temper l(2).sunny l(2).rain l(2).snow), vce(cluster user_id) nocons
	outreg2 using $results/permutation_072020.xls, append keep(l.`nm')
	
	
	ivregress gmm diff_highway diff_traffic_jam diff_weekend diff_rush_hour ///
	diff_high_temper diff_low_temper diff_sunny  diff_rain diff_snow ///
	(l.diff_highway l.`nm' l.diff_drive_score diff_gap_time ///
	= l(2).highway l(2).drive_score l(2).gap_time l(2).weekend l(2).rush_hour ///
	l(2).total_traffic_sec l(2).high_temper l(2).low_temper l(2).sunny l(2).rain l(2).snow), vce(cluster user_id) nocons
	outreg2 using $results/permutation_072020.xls, append keep(l.`nm')

}















import delimited $results/permutation_072020.txt, clear


drop v1 v2

drop if _n<=3 | _n>=164 




forvalues i=3(1)482{

	loc j=`i'-2
	rename v`i' v`j' 
	
} 

forvalues i=1(1)80{
	
	loc j1=(`i'-1)*6+1
	loc j2=(`i'-1)*6+2
	loc j3=(`i'-1)*6+3
	loc j4=(`i'-1)*6+4
	loc j5=(`i'-1)*6+5
	loc j6=`i'*6
	
	preserve 
	
		keep v`j1'-v`j6' 
		drop if v`j1'==""
		
		rename v`j1' phone_use
		rename v`j2' distance
		rename v`j3' duration
		rename v`j4' speed
		rename v`j5' drive_at_night
		rename v`j6' highway
		
		gen temp=substr(phone_use, 1, 1)
		drop if temp=="("
			
		save "$results/coef_temp_`i'.dta", replace
		
	restore
}

use "$results/coef_temp_1.dta", clear
erase "$results/coef_temp_1.dta"

forvalues i=5(4)77{
	append using "$results/coef_temp_`i'.dta"
	erase "$results/coef_temp_`i'.dta"
}


forvalues i=2(4)78{
	append using "$results/coef_temp_`i'.dta"
	erase "$results/coef_temp_`i'.dta"
}

forvalues i=3(4)79{
	append using "$results/coef_temp_`i'.dta"
	erase "$results/coef_temp_`i'.dta"
}

forvalues i=4(4)80{
	append using "$results/coef_temp_`i'.dta"
	erase "$results/coef_temp_`i'.dta"
}






